# from subprocess import call
## IDM下载。。失败
# IDM = r'C:\Program Files (x86)\Internet Download Manager\IDMan.exe'
# DownPath = r'D:\1_work\python采集\国家食品药品监督管理总局\download_file'
# OutPutFileName = 'test.xls'
# urls = [
#     'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1482267824189.xls',
#     # 'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1543007053091.doc',
# ]
# for url in urls:
#     call([IDM, '/d', url, '/p', DownPath, '/f', OutPutFileName, '/n', '/a'])


# import requests
##  requests下载。。失败
# url = 'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1482267824189.xls'
# r = requests.get(url)
# print(r.content)
# file_name = 'D:/1_work/python采集/国家食品药品监督管理总局/download_file/{}.{}'.format('index', 'xlsx')
# with open(file_name, 'wb') as file:
#      for content in r.iter_content():
#         file.write(content)


## urlretrieve下载  失败
# import urllib.request
# urls = [
#     'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1482267824189.xls',
#     'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1543007053091.doc',
# ]
# for url in urls:
#     print(url)
#     file_name = 'D:/1_work/python采集/国家食品药品监督管理总局/download_file/{}.{}'.format('index', url.split('.')[-1])
#
#     urllib.request.urlretrieve(url, file_name)

# selenium 下载nb!!
from selenium import webdriver
from time import sleep
import pymysql
from multiprocessing.dummy import Pool as ThreadPool


def get_link():
    host = '127.0.0.1'
    db = 'app_mark'
    user = 'root'
    passwd = '123456'
    charset = 'utf8mb4'
    con = pymysql.connect(host=host, db=db, user=user, passwd=passwd, charset=charset)
    cur = con.cursor()
    sql = 'select IR_URLNAME from xly'
    try:
        cur.execute(sql)
        results = cur.fetchall()
    except Exception as e:
        con.rollback()
        results = None
        print('error! ', e)
    else:
        con.commit()
    cur.close()
    con.close()
    return results


def download(url):
    opt = webdriver.ChromeOptions()
    prefs = {
        'profile.default_content_settings.popups': 0,  # 这个应该是浏览器不弹框直接下载的参数
        'download.default_directory': 'D:/1_work/python采集/国家食品药品监督管理总局/download_file/'
    }
    opt.add_experimental_option('prefs', prefs)
    # opt.add_argument('--headless')  # 不知道为啥弄成无头就不下载。。
    opt.add_argument('--no-sandbox')
    opt.add_argument('--disable-gpu')
    opt.add_argument('--disable-dev-shm-usage')
    driver = webdriver.Chrome(chrome_options=opt)
    print(url)
    driver.get(url)
    sleep(5)
    driver.quit()
    # urls = [
    #     'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1482267824189.xls',
    #     'http://samr.cfda.gov.cn/directory/web/WS01/images/localgov/gov_1543007053091.doc',
    # ]
    # for url in urls:
    #     print(url)
    #     driver.get(url)

    # driver.find_element_by_xpath('//a[text()="testsaveas.zip"]').click()
    #     sleep(5)
    #     driver.close()  # 关闭一个窗口
    # driver.quit()  # 关闭所有窗口


if __name__ == '__main__':
    results = get_link()
    if results:
        results = [x[0] for x in results]
        pool = ThreadPool(5)
        pool.map(download, results)
        pool.close()
        pool.join()
